The goal of this markdown is to visualize the antibody reactivity data from multiplex immunoassays performed by Tenzin Tashi and compiled by Aditi Upadhye for the CTC PDT funded project “Evaluating the dynamics and function of naturally acquired humoral immune responses to Plasmodium vivax vaccine candidate antigens in a longitudinal study.”
The data consists of IgG responses specific against several P. vivax antigens using plasma obtained for Brazilian patients with acute vivax malaria and followed up at days 30, 60, and 180 after initial enrollment. This is an initial attempt to analyze the decay of antibody responses over time.
The major findings of this study to date are:
Table 1. Participant characteristics
Fig. 1. Correlations of antigen-specific IgG responses
Fig. 2. Seropositivity at each timepoint by antigen for subjects with data
Fig. 3. Reactivity over time points, faceted by Pv antigen, as ln(max FI) per subject, exponential decay
Fig. 4 Forest Plot comparing Tenshi et al vs Longley et al
Table 2. Half-lives, linear-mixed effects regression, unadjusted analysis
Table S2. Half-lives, linear-mixed effects regression, adjusted analysis
#read in all data
alldat <- readRDS(paste0(datadir,"Brazil Pvivax dataframe Aditi 02282022.rds")) %>%
dplyr::filter(Dilution == "1:400") %>%
mutate(Antigen = gsub("DBP-FL", "PvDBP-FL", Antigen)) %>%
mutate(Antigen = gsub("MSP3a", "PvMSP3a", Antigen)) %>%
mutate(Antigen = gsub("EBP", "PvEBP", Antigen)) %>%
mutate(Antigen = gsub("CD4", "ratCD4", Antigen)) %>%
mutate(Antigen = factor(Antigen, levels = c("BSA","ratCD4", "Thioredoxin", "Tetanus Toxoid",
"Pv12",
"Pv41",
"PvDBP-FL",
"PvEBP",
"PvGAMA",
"PvMSP3a",
"PvRBP1a",
"PvRBP2b",
"Pvx081550"))) %>%
mutate(Antigen = gsub("Pvx081550", "PVX_081550", Antigen)) %>%
mutate(Antigen = gsub("Tetanus Toxoid", "tetanus toxoid", Antigen)) %>%
mutate(AntigenType = factor(AntigenType, levels = c("background control", "reactivity control", "P. vivax antigen")))
#read in cleaned data
alldat_norm_bkgd_rem <- readRDS(paste0(datadir, "alldat_norm_bkgd_rem.rds")) %>%
mutate(Antigen = gsub("Tetanus Toxoid", "tetanus toxoid", Antigen))
## Stratified by City
## Overall
## n 47
## Gender (%) 34 (72.3)
## Age, years (median [range]) 32.0 [12.0, 64.0]
## Number of prior malaria episodes (median [range]) 3.0 [0.0, 30.0]
## Years residence in Amazon basin (median [range]) 26.0 [7.0, 64.0]
## Stratified by City
## Acrelândia
## n 10
## Gender (%) 7 (70.0)
## Age, years (median [range]) 31.0 [14.0, 55.0]
## Number of prior malaria episodes (median [range]) 1.0 [0.0, 6.0]
## Years residence in Amazon basin (median [range]) 21.0 [8.0, 43.0]
## Stratified by City
## Califónia
## n 6
## Gender (%) 3 (50.0)
## Age, years (median [range]) 41.0 [29.0, 56.0]
## Number of prior malaria episodes (median [range]) 3.0 [0.0, 15.0]
## Years residence in Amazon basin (median [range]) 33.0 [7.0, 54.0]
## Stratified by City
## Jéssica
## n 19
## Gender (%) 15 (78.9)
## Age, years (median [range]) 30.0 [18.0, 56.0]
## Number of prior malaria episodes (median [range]) 3.0 [0.0, 30.0]
## Years residence in Amazon basin (median [range]) 30.0 [18.0, 54.0]
## Stratified by City
## Plácido p
## n 12
## Gender (%) 9 (75.0) 0.576
## Age, years (median [range]) 23.0 [12.0, 64.0] 0.183
## Number of prior malaria episodes (median [range]) 2.0 [0.0, 10.0] 0.211
## Years residence in Amazon basin (median [range]) 19.0 [7.0, 64.0] 0.033
## Stratified by City
## test
## n
## Gender (%)
## Age, years (median [range]) nonnorm
## Number of prior malaria episodes (median [range]) nonnorm
## Years residence in Amazon basin (median [range]) nonnorm
Table 1. Baseline characteristics of study participants | |||||||
Variable | Overall | Acrelândia | Califónia | Jéssica | Plácido | p | test |
n | 47 | 10 | 6 | 19 | 12 | ||
Gender (%) | 34 (72.3) | 7 (70.0) | 3 (50.0) | 15 (78.9) | 9 (75.0) | 0.576 | |
Age, years (median [range]) | 32.0 [12.0, 64.0] | 31.0 [14.0, 55.0] | 41.0 [29.0, 56.0] | 30.0 [18.0, 56.0] | 23.0 [12.0, 64.0] | 0.183 | nonnorm |
Number of prior malaria episodes (median [range]) | 3.0 [0.0, 30.0] | 1.0 [0.0, 6.0] | 3.0 [0.0, 15.0] | 3.0 [0.0, 30.0] | 2.0 [0.0, 10.0] | 0.211 | nonnorm |
Years residence in Amazon basin (median [range]) | 26.0 [7.0, 64.0] | 21.0 [8.0, 43.0] | 33.0 [7.0, 54.0] | 30.0 [18.0, 54.0] | 19.0 [7.0, 64.0] | 0.033 | nonnorm |
Numbers are n. (%) unless otherwise noted. Fisher's exact test used for categorical variables. Kruskal-wallis Rank Sum Test used for continuous variables. | |||||||
Make correlation matrices for each timepoint
Assess number of samples per timepoint
## `summarise()` has grouped output by 'Antigen'. You can override using the
## `.groups` argument.
## # A tibble: 36 × 3
## # Groups: Antigen [9]
## Antigen Timepoint n
## <chr> <dbl> <int>
## 1 Pv12 0 44
## 2 Pv12 30 40
## 3 Pv12 60 22
## 4 Pv12 180 21
## 5 Pv41 0 44
## 6 Pv41 30 40
## 7 Pv41 60 22
## 8 Pv41 180 21
## 9 PvDBP-FL 0 44
## 10 PvDBP-FL 30 40
## # … with 26 more rows
## # ℹ Use `print(n = ...)` to see more rows
Plot correlation heatmaps by timepoint
day 0
corrplot::corrplot(timepoint_0_mat$r,
method = "square",
order = "hclust",
hclust.method = "ward.D2",
type = "upper",
p.mat = timepoint_0_mat$p,
insig = "label_sig",
sig.level = c(.001, .01, .05),
pch.cex = 1,
pch.col = "white",
tl.col="black",
addCoef.col = "black",
outline=FALSE)
day 30
corrplot::corrplot(timepoint_30_mat$r,
method = "square",
order = "hclust",
hclust.method = "ward.D2",
type = "upper",
p.mat = timepoint_30_mat$p,
insig = "label_sig",
sig.level = c(.001, .01, .05),
pch.cex = 1,
pch.col = "white",
tl.col="black",
addCoef.col = "black",
outline=FALSE)
day 60
corrplot::corrplot(timepoint_60_mat$r,
method = "square",
order = "hclust",
hclust.method = "ward.D2",
type = "upper",
p.mat = timepoint_60_mat$p,
insig = "label_sig",
sig.level = c(.001, .01, .05),
pch.cex = 1,
pch.col = "white",
tl.col="black",
addCoef.col = "black",
outline=FALSE)
day 180
corrplot::corrplot(timepoint_180_mat$r,
method = "square",
order = "hclust",
hclust.method = "ward.D2",
type = "upper",
p.mat = timepoint_180_mat$p,
insig = "label_sig",
sig.level = c(.001, .01, .05),
pch.cex = 1,
pch.col = "white",
tl.col="black",
addCoef.col = "black",
outline=FALSE)
Before preceding further, we want to determine what a seropositive is using the North American controls to set a threshold for positivity (mean + 3 st dev).
We first filter out the North American controls, noting that only all were ran on Plate 1 except for NAC 1, which was run on all plates. For consistency, and being that we already normalized based on the positive control standard, we will only use NACs on Plate 1 for this.
Evaluate seroprevalence (% seropositive) for each timepoint, faceted by antigen
alldat_norm_bkgd_rem %>%
filter(AntigenType == "P. vivax antigen" & Group == "Brazilian") %>%
filter(!is.na(Timepoint)) %>%
filter(!is.na(seropositive)) %>%
group_by(Antigen, Timepoint) %>%
summarize(n = n(), seroprevalence = sum(seropositive)/n()) %>%
mutate(timepoint = factor(Timepoint)) %>%
mutate(timepoint = paste0(timepoint, " (n=", n, ")")) %>%
mutate(timepoint = fct_reorder(timepoint, Timepoint, .desc = FALSE)) %>%
ggplot(., aes(x=timepoint, y = seroprevalence)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = scales::percent) +
ylab("% seropositive") +
xlab("days from acute vivax malaria") +
geom_hline(yintercept = .50, linetype = "dotted", color="red") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
facet_wrap(~Antigen)
(#fig:seroprevalence by antigen)Seroprevalence at each timepoint by antigen
The data shows that >50% seroprevalence is maintained for Pv41, PvGAMA, and PvRBP2 from acute presentation all the way up to 180 days. Pv12 and PvEBP also demonstrate more prolonged maintenance of seropositivity 180 days from acute vivax malaria.
We take advantage of the longitudinal data to ask how many people are seropositive at admission and either seroconvert or serorevert over time?
Subjects with all four time points
mytable <- table("subject" = alldat_norm_bkgd_rem$Subject, "time point" = alldat_norm_bkgd_rem$Timepoint)
#with 0, 30, 60, 180
subjects_with_0_3_60_180 <- as.data.frame.matrix(mytable, optional = TRUE, make.names = TRUE) %>%
rownames_to_column(var = "Subject") %>%
as_tibble() %>%
dplyr::filter(`0` != 0 & `30` != 0 & `60` != 0 & `180` != 0) %>%
.$Subject
length(unique(subjects_with_0_3_60_180))
## [1] 8
alldat_norm_bkgd_rem %>%
filter(Subject %in% subjects_with_0_3_60_180) %>%
filter(AntigenType == "P. vivax antigen" & Group == "Brazilian") %>%
filter(!is.na(Timepoint)) %>%
mutate(timepoint = factor(Timepoint)) %>%
mutate(subject = factor(Subject)) %>%
mutate(reactivity = factor(seropositive, levels = c(0,1), labels = c("seronegative","seropositive"))) %>%
select(subject, Antigen, timepoint, FI_norm_bkgd_rem, reactivity, seropositive) %>%
group_by(Antigen, timepoint) %>%
mutate(subject = fct_reorder(subject, FI_norm_bkgd_rem, .desc = FALSE)) %>%
ungroup() %>%
ggplot(., aes(x=timepoint, y = subject, fill = reactivity)) +
geom_tile(color = "white", size = 0.1) +
scale_fill_manual(values = c("gray80", "black")) +
#scale_fill_viridis(name="reactivity", discrete = TRUE, option = "magma", direction = -1) +
coord_equal() +
facet_wrap(~Antigen, nrow = 1) +
labs(x="days from acute vivax", y="subjects", title = "Seropositivity over time") +
ggthemes::theme_tufte(base_family="Helvetica") +
theme(axis.ticks=element_blank(),
axis.text=element_text(size=12),
panel.border=element_blank(),
plot.title=element_text(hjust=0),
strip.text=element_text(hjust=0),
panel.spacing.x = unit(0.25, "cm"),
panel.spacing.y = unit(0.25, "cm"),
legend.title=element_text(size=8),
legend.title.align=1,
legend.text=element_text(size=8),
legend.position="bottom",
legend.key.size=unit(0.2, "cm"),
legend.key.width=unit(1, "cm"))
Lastly, we can also visualize as percent of max, which also does not need any normalization or background subtraction given these are canceled out. Hence, raw FI is used.
Strategy is to group by subject and antigen and divide raw FI over max(FI) and express as a percentage.
alldat_norm_bkgd_rem_pctmax <- alldat_norm_bkgd_rem %>%
group_by(Plate, Subject, Antigen) %>%
mutate(FI_pct_max = FI_norm_bkgd_rem/max(FI_norm_bkgd_rem, na.rm = TRUE)) %>%
ungroup()
Determine subjects who have data at days 0 and 30 and days 0, 30, 60.
mytable <- table("subject" = alldat_norm_bkgd_rem$Subject, "time point" = alldat_norm_bkgd_rem$Timepoint)
#subjects with at least day 0 and 30 data
subjects_with_0_30 <- as.data.frame.matrix(mytable, optional = TRUE, make.names = TRUE) %>%
rownames_to_column(var = "Subject") %>%
as_tibble() %>%
dplyr::filter(`0` != 0 & `30` != 0) %>%
.$Subject
length(unique(subjects_with_0_30))
## [1] 37
#with 0, 30, 60
subjects_with_0_3_60 <- as.data.frame.matrix(mytable, optional = TRUE, make.names = TRUE) %>%
rownames_to_column(var = "Subject") %>%
as_tibble() %>%
dplyr::filter(`0` != 0 & `30` != 0 & `60` != 0) %>%
.$Subject
length(unique(subjects_with_0_3_60))
## [1] 19
Estimate the curve from day 0 through day 180 as exponential decay. For this, we determine the timepoint with the max FI for each antigen for each subject, and then analyze only values from the max timepoint onward to give the most accurate model of decay. This is because values prior to the max are not needed to determine decay and actually makes the decay model less accurate
You can plot the exponential decay fit using the following: (see https://stackoverflow.com/questions/41101841/exponential-fit-in-ggplot-r). By taking the natural log of the FI, you can then plot as linear decay.
Decay be approximated by as simple linear model. Since we’re using repeated measures, we will opt for a mixed effects model with random intercepts for subjects. Since we have different slopes for each antigen, we will use the do and tidy functions applied to lmer.
Multiple linear regression with ln(reactivity) as the dependent variable and time (days) as the independent variable in separate models unadjusted or adjusted for years living in the malaria-endemic Amazon region. A separate model was run for each antigen. Half-life (t1/2) was calculated as ln(0.5)/estimate and further converted to months by dividing by 30. P values were adjusted for multiple comparisons (antigens) using the Benjamini-Hochberg (BH) procedure.
Requirements 1. Use only data from timepoint with max FI for each subject and antigen onward so as to accurately model decay 2. Select only subjects with at least two datapoints from max timepoint onward
myantigens <- c("Pv41", "PvGAMA", "PvRBP2b", "PVX_081550", "PvDBP-FL", "PvEBP", "Pv12", "PvMSP3a", "tetanus toxoid")
#not usd for manuscript
#https://academic.oup.com/abt/article/4/3/144/6309336
#tetanus antibodies: https://www.nejm.org/doi/full/10.1056/NEJMoa066092
library(lmerTest)
halflife_res_mixed <- alldat_norm_bkgd_rem_pctmax %>%
select(c(Subject, Group, AntigenType, Antigen, Timepoint, Years_Endemic, gender, age, FI_norm_bkgd_rem, FI_pct_max)) %>%
filter(Group == "Brazilian") %>% #filter only Brazilian samples
filter(Subject != "pos_control") %>% #remove positive controls
filter(!is.nan(FI_pct_max)) %>% #remove NaNs
filter(Antigen %in% myantigens) %>% #filter on only Pv antigens that are have seroprevalence >50%
left_join(., alldat %>%
filter(Group == "Brazilian") %>% #filter only Brazilian samples
filter(Subject != "pos_control") %>% #remove positive controls
select(Subject, "Number of prior malaria episodes") %>%
rename(Prior_malaria = "Number of prior malaria episodes") %>%
distinct(Subject, .keep_all = TRUE) %>%
mutate(Prior_malaria = as.integer(Prior_malaria)),
by = "Subject") %>%
mutate(Antigen = factor(Antigen, levels = c("Pv41", "PvGAMA", "PvRBP2b", "PVX_081550", "PvDBP-FL", "PvEBP", "Pv12", "PvMSP3a", "tetanus toxoid"))) %>%
mutate(Years_Endemic = as.numeric(Years_Endemic)) %>% #this line is the one that gives the "NAs introduced by coercion" message
mutate(Years_Endemic = ifelse(is.na(Years_Endemic),median(Years_Endemic, na.rm = TRUE),Years_Endemic)) %>% #impute missing variable for 2 subjects
mutate(Prior_malaria = ifelse(is.na(Prior_malaria),median(Prior_malaria, na.rm = TRUE),Prior_malaria)) %>% #impute missing variable for 4 subjects
mutate(ln_FI_pct_max = log(FI_pct_max+0.0001)) %>%
mutate(ln_FI_norm_bkgd_rem = log(FI_norm_bkgd_rem+1)) %>%
arrange(Subject, Antigen, Timepoint) %>%
group_by(Subject, Antigen) %>%
mutate(days_from_max_pct = Timepoint - Timepoint[which.max(FI_norm_bkgd_rem)]) %>%
filter(between(row_number(),which.max(FI_norm_bkgd_rem), n())) %>% #this line removes all values before the max so that we can estimate decay from the highest available value
filter(n()>1) %>% #filter subjects with at least two data points
ungroup()
#Subject 32 did not have tetanus toxoid response, so removed from analysis
halflife_res_mixed <- halflife_res_mixed %>%
filter(!(Subject == "032" & Antigen == "tetanus toxoid"))
Plot exponential decay
#add small prior to FI_pct_max to avoid taking log of zero
exponential_decay_plot_all <- halflife_res_mixed %>%
ggplot(., aes(x=days_from_max_pct, y = FI_pct_max + 0.0001, fill= Antigen, group = Antigen, color = Antigen)) +
#geom_point(colour="black",pch=21, size=2.5) +
#geom_smooth(method ="lm", formula = y~x, alpha = 0.15) +
geom_smooth(method="glm", formula=y~x, method.args=list(family=gaussian(link="log")), alpha = 0.15, se = TRUE) +
geom_hline(yintercept = c(0.25,0.5,0.75,1), linetype = "dotted", color = "gray") +
scale_y_continuous(labels = scales::percent, breaks = c(0.25,0.5,0.75,1)) +
scale_x_continuous(breaks = c(0,30,60,90,120,150,180)) +
ylab("percent of max antibody reactivity") +
xlab("days after max antibody reactivity") +
theme_bw() +
theme(text = element_text(size=11),
axis.ticks=element_blank(),
#axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
axis.text=element_text(size=6.5),
panel.border=element_blank(),
plot.title=element_text(hjust=0),
strip.background = element_blank(),
panel.spacing.x = unit(0.25, "cm"),
panel.spacing.y = unit(0.25, "cm"),
legend.position = "top",
legend.title=element_blank(),
legend.title.align=1,
legend.text=element_text(size=8),
legend.key.size=unit(0.15, "cm"),
legend.key.width=unit(0.15, "cm")) +
scale_color_brewer(palette="Spectral") +
scale_fill_brewer(palette="Spectral")
exponential_decay_plot_all
#refit model
nls_fit <- halflife_res_mixed %>%
nls(formula = FI_pct_max + 0.0001 ~ a*exp(-b *days_from_max_pct),
start = c(a=10, b=0.01), data = .)
coef(nls_fit)
## a b
## 0.951856825 0.009894891
#add small prior to FI_pct_max to avoid taking log of zero
exponential_decay_plot_all_facet <- exponential_decay_plot_all +
stat_regline_equation(label.y = 1.07, label.x = 100, aes(label = ..adj.rr.label..), color = "gray10", cex = 3) +
theme(legend.position = "none") +
facet_wrap(~Antigen)
exponential_decay_plot_all_facet
Plot natural log of FI to linearize
#add small prior to FI_pct_max to avoid taking log of zero
linear_decay_plot_facet <- halflife_res_mixed %>%
ggplot(., aes(x=days_from_max_pct, y = ln_FI_pct_max, group = Antigen, fill= Antigen, color = Antigen)) +
geom_point(colour="black",pch=21, size=2.5) +
geom_smooth(method ="lm", formula = y~x, alpha = 0.15) +
#geom_smooth(method="glm", formula=y~x, method.args=list(family=gaussian(link="log")), alpha = 0.15, se = TRUE) +
#geom_hline(yintercept = c(0.25,0.5,0.75,1), linetype = "dotted", color = "gray") +
#scale_y_continuous(labels = scales::percent, breaks = c(0.25,0.5,0.75,1)) +
scale_x_continuous(breaks = c(0,30,60,90,120,150,180)) +
ylab("ln(proportion max reactivity)") +
xlab("days after max antibody reactivity") +
theme_bw() +
scale_color_brewer(palette="Spectral") +
scale_fill_brewer(palette="Spectral") +
#stat_regline_equation(label.y = -5, label.x = 5, aes(label = ..rr.label..), color = "gray10") +
stat_regline_equation(label.y = -6, label.x = 5, aes(label = ..adj.rr.label..), color = "gray10", cex = 3) +
facet_wrap(~Antigen) +
theme(text = element_text(size=11),
axis.ticks=element_blank(),
#axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
axis.text=element_text(size=6.5),
panel.border=element_blank(),
plot.title=element_text(hjust=0),
strip.background = element_blank(),
panel.spacing.x = unit(0.25, "cm"),
panel.spacing.y = unit(0.25, "cm"),
# legend.title=element_text(size=8),
# legend.title.align=1,
# legend.text=element_text(size=8),
# legend.key.size=unit(0.2, "cm"),
# legend.key.width=unit(1, "cm"),
legend.position="none")
linear_decay_plot_facet
halflife_res_mixed <- halflife_res_mixed %>%
left_join(., alldat %>%
dplyr::select(c(Subject, city)) %>%
filter(!duplicated(Subject)) %>%
drop_na(),
by = "Subject")
library(tableone)
library(flextable)
library(officer)
source("/Volumes/GoogleDrive/My Drive/Tran Lab Shared/Projects/Vivax Brazil Immune Responses/PvBrazilAbKinetics/customtab.R")
foo1 <- lmer_unadj_res %>%
dplyr::select(Antigen, estimate, std.error, statistic, halflife_days, halflife_days_lowCI, halflife_days_highCI)
# Rename first variable from n to No.
#tab1_df$Variable[1] <- "No."
# Set Table header
header <- str_squish(str_remove("Table 2. Antibody half-life determination using linear mixed-effects regression of antibody reactivity", "\n"))
# Set Table footer
footer <- str_squish(str_remove("A linear mixed effects regression with ln(reactivity) as the dependent variable and time (days) and subject as fixed and random effects, respectively. Half-life (t1/2) was calculated as ln(0.5)/estimate.", "\n"))
# Set custom_tab() defaults
customtab_defaults()
# Create the flextable object
flextable_lmer_unadj_res <- custom_tab(foo1, header, footer)
Table 2. Antibody half-life determination using linear mixed-effects regression of antibody reactivity | ||||||
Antigen | estimate | std.error | statistic | halflife_days | halflife_days_lowCI | halflife_days_highCI |
tetanus toxoid | -0.0016 | 0.00031 | -5.3 | 430 | 310 | 690 |
PVX_081550 | -0.0067 | 0.00079 | -8.5 | 100 | 83 | 130 |
PvRBP2b | -0.0076 | 0.00072 | -11.0 | 91 | 76 | 110 |
Pv12 | -0.0085 | 0.00120 | -7.3 | 82 | 64 | 110 |
Pv41 | -0.0110 | 0.00100 | -11.0 | 64 | 54 | 79 |
PvEBP | -0.0140 | 0.00190 | -7.0 | 51 | 39 | 71 |
PvDBP-FL | -0.0160 | 0.00230 | -6.9 | 44 | 34 | 63 |
PvMSP3a | -0.0170 | 0.00220 | -7.7 | 41 | 32 | 55 |
PvGAMA | -0.0210 | 0.00180 | -12.0 | 32 | 28 | 39 |
A linear mixed effects regression with ln(reactivity) as the dependent variable and time (days) and subject as fixed and random effects, respectively. Half-life (t1/2) was calculated as ln(0.5)/estimate. | ||||||
library(tableone)
library(flextable)
library(officer)
source("/Volumes/GoogleDrive/My Drive/Tran Lab Shared/Projects/Vivax Brazil Immune Responses/PvBrazilAbKinetics/customtab.R")
foo2 <- lmer_adj_res %>%
dplyr::select(Antigen, estimate, std.error, "BH-adjusted.p.value", significant, halflife_days, halflife_days_lowCI, halflife_days_highCI)
# Rename first variable from n to No.
#tab1_df$Variable[1] <- "No."
# Set Table header
header <- str_squish(str_remove("Table S2. Antibody half-life determination using linear mixed-effects regression of antibody reactivity, adjusted analysis", "\n"))
# Set Table footer
footer <- str_squish(str_remove("A linear mixed effects regression with ln(reactivity) as the dependent variable and time (days), sex, age, city, and years in the malaria-endemic Amazon region as fixed effects and subject as a random effect. Half-life (t1/2) was calculated as ln(0.5)/estimate.", "\n"))
# Set custom_tab() defaults
customtab_defaults()
# Create the flextable object
flextable_lmer_adj_res <- custom_tab(foo2, header, footer)
Table S2. Antibody half-life determination using linear mixed-effects regression of antibody reactivity, adjusted analysis | |||||||
Antigen | estimate | std.error | BH-adjusted.p.value | significant | halflife_days | halflife_days_lowCI | halflife_days_highCI |
tetanus toxoid | -0.0016 | 0.00031 | 9.10e-05 | * | 430 | 310 | 690 |
PVX_081550 | -0.0068 | 0.00079 | 2.70e-10 | * | 100 | 83 | 130 |
PvRBP2b | -0.0076 | 0.00072 | 1.00e-11 | * | 91 | 76 | 110 |
Pv12 | -0.0086 | 0.00120 | 7.20e-08 | * | 81 | 63 | 110 |
Pv41 | -0.0110 | 0.00100 | 1.20e-11 | * | 64 | 54 | 79 |
PvEBP | -0.0140 | 0.00190 | 3.50e-07 | * | 51 | 40 | 72 |
PvDBP-FL | -0.0150 | 0.00230 | 6.70e-07 | * | 45 | 35 | 65 |
PvMSP3a | -0.0170 | 0.00220 | 3.50e-07 | * | 41 | 32 | 56 |
PvGAMA | -0.0210 | 0.00180 | 1.30e-12 | * | 32 | 28 | 39 |
A linear mixed effects regression with ln(reactivity) as the dependent variable and time (days), sex, age, city, and years in the malaria-endemic Amazon region as fixed effects and subject as a random effect. Half-life (t1/2) was calculated as ln(0.5)/estimate. | |||||||
unadj_and_adj <- lmer_unadj_res %>%
dplyr::select(-c(effect, df, group, term, halflife_months, halflife_years, significant)) %>%
left_join(., lmer_adj_res %>%
dplyr::select(-c(effect, df, group, term, halflife_months, halflife_years, significant)),
by = "Antigen")
colnames(unadj_and_adj) <- gsub("\\.x", "_unadj", colnames(unadj_and_adj))
colnames(unadj_and_adj) <- gsub("\\.y", "_adj", colnames(unadj_and_adj))
longley_dat_clean <- longley_dat %>%
filter(site == "Brazil") %>%
#mutate(Antigen = factor(Antigen, levels = c("Pv41", "PvGAMA", "PvRBP2b", "PVX_081550", "PvDBP-FL", "Pv12", "PvMSP3a"))) %>%
rename(Subject = `Subject ID`) %>%
mutate(Week = as.numeric(gsub("w", "", visit))) %>%
mutate(Day = Week*7) %>%
mutate(Month = Week/4) %>%
arrange(Subject, Antigen, Week) %>%
select(Subject, Antigen, visit, Day, Week, Month, response) %>%
group_by(Subject, Antigen, visit, Day, Week, Month) %>%
summarise_at(vars(response), mean, na.rm = TRUE) %>%
filter(Week != 36) %>% # responses at week 36 go up, so only use data from before week 36
ungroup() %>%
group_by(Subject, Antigen) %>%
mutate(days_from_max_pct = Day - Day[which.max(response)]) %>%
filter(between(row_number(),which.max(response), n())) %>% #this line removes all values before the max so that we can estimate decay from the highest available value
filter((response > 0 & Day == 0) | Day > 0) %>% #filter subjects with zero response
filter(n()>1) %>% #filter subjects with at least two data points
ungroup() %>%
mutate(dummy_name = paste(Subject, Antigen, sep="_"))
#remove subjects where all values are zero
longley_dat_keep <- longley_dat_clean %>%
group_by(Subject, Antigen) %>%
summarize(sum = sum(response)) %>%
filter(sum > 0) %>%
ungroup() %>%
mutate(dummy_name = paste(Subject, Antigen, sep="_"))
longley_dat_filtered <- longley_dat_clean %>%
mutate(dummy_name = paste(Subject, Antigen, sep="_")) %>%
filter(dummy_name %in% longley_dat_keep$dummy_name)
lmer_adj_res_longley <- longley_dat_filtered %>%
select(Subject, Antigen, Day, response) %>%
mutate(ln_response = log(response+1)) %>% #add 1 to all values to avoid taking log of zero
group_by(Antigen) %>%
do(broomExtra::tidy(lmerTest::lmer(ln_response~Day + (1|Subject), data = .), conf.int = TRUE)) %>%
ungroup() %>%
mutate(p.value = as.numeric(formatC(p.value, format = "e", digits = 2))) %>%
mutate("BH-adjusted.p.value" = as.numeric(p.adjust(p.value, method = "BH"))) %>%
mutate(halflife_days_longley_recalc = log(0.5)/estimate) %>%
mutate(halflife_days_lowCI_longley_recalc = log(0.5)/conf.low) %>%
mutate(halflife_days_highCI_longley_recalc = log(0.5)/conf.high) %>%
mutate(halflife_months_longley_recalc = halflife_days_longley_recalc/30) %>%
mutate(halflife_years_longley_recalc = halflife_days_longley_recalc/365.25) %>%
mutate(across(where(is.numeric), signif, 2)) %>%
dplyr::filter(term == "Day") %>%
mutate(significant = ifelse(.$"BH-adjusted.p.value"<0.05, "*", "NS")) %>%
mutate("BH-adjusted.p.value" = formatC(.$"BH-adjusted.p.value", format = "e", digits = 2)) %>%
arrange(desc(halflife_days_longley_recalc)) %>%
dplyr::select(Antigen, estimate, std.error, "BH-adjusted.p.value", significant, halflife_days_longley_recalc, halflife_days_lowCI_longley_recalc, halflife_days_highCI_longley_recalc)
lmer_adj_res_longley_short <- lmer_adj_res_longley %>%
dplyr::select(Antigen, contains("halflife"))
lmer_adj_res_longley_recalc <- protein_dat %>%
right_join(., lmer_unadj_res,
by = "Antigen") %>%
left_join(., lmer_adj_res_longley_short,
by = "Antigen") %>%
select(Antigen, Protein, everything()) %>%
rename(PlasmoDB_Accession = Protein) %>%
mutate(overlapping_CIs = ifelse(
(halflife_days_highCI_longley_recalc < halflife_days_lowCI) |
(halflife_days_highCI < halflife_days_lowCI_longley_recalc) ,
"no", "yes"))
lmer_adj_res_longley_recalc %>%
filter(overlapping_CIs == "yes") %>%
select(Antigen)
## Antigen
## 1 Pv12
## 2 Pv41
## 3 PVX_081550
## 4 PvRBP2b
lmer_adj_res_longley_recalc %>%
filter(overlapping_CIs == "no") %>%
select(Antigen)
## Antigen
## 1 PvMSP3a
## 2 PvGAMA
## 3 PvDBP-FL